In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
seaborn.set_theme()
pd.set_option('future.no_silent_downcasting', True)
orig_input_df = pd.read_csv('./reactors.csv').convert_dtypes()
In [2]:
with open('regions-m49.json') as f:
regions = json.load(f)['regions']
regions = {v: n for n, v in regions.items()}
with open('countries-m49.json') as f:
countries = json.load(f)['countries']
country_subregions = {c['name']: regions[c['subRegion']] for c in countries}
country_regions = {c['name']: regions[c['region']] for c in countries}
In [3]:
country_mapping = {
'Czech Republic': 'Czechia',
'Iran': 'Iran (Islamic Republic of)',
'Russia': 'Russian Federation',
'South Korea': 'Republic of Korea',
'Taiwan': 'China', # UN is an abomination
'Turkey': 'T\u00fcrkiye',
'United Kingdom': 'United Kingdom of Great Britain and Northern Ireland',
'United States': 'United States of America',
}
max_year = 2023
years = range(1954, max_year + 1)
copyright_text = 'CC BY-SA 4.0 Andrey Upadyshev (image) and\nWikipedia, List of commercial nuclear reactors (data)'
copyright_font_size = 10
In [4]:
input_df = orig_input_df[orig_input_df['Begin building'].notna() & orig_input_df['Begin building'] <= max_year].reset_index(drop=True)
input_df.loc[input_df['Commercial operation'] > max_year, 'Commercial operation'] = None
input_df.loc[input_df['Closed'] > max_year, 'Closed'] = None
input_df['SubRegion'] = input_df['Country'].map(lambda x: country_subregions[country_mapping.get(x, x)])
input_df['Region'] = input_df['Country'].map(lambda x: country_regions[country_mapping.get(x, x)])
input_df['Operated closed'] = input_df['Closed'].where(input_df['Commercial operation'].notna(), None)
In [5]:
def format_years_ticks(ax):
for label in ax.get_xticklabels():
if label.get_text() in ('1957', '1979', '1986', '2011'):
label.set_color('red')
def format_number_plants_ticks(ax, ymin, ymax):
def fmt(val, pos):
return int(abs(val))
ax.yaxis.set_major_formatter(fmt)
ax.set_ylim(-ymin, ymax)
World stats¶
In [6]:
num_building_started = input_df.groupby(['Begin building'])['Begin building'].count()
num_connected = input_df.groupby(['Commercial operation'])['Commercial operation'].count()
num_closed = input_df.groupby(['Closed'])['Closed'].count()
num_operated_closed = input_df.groupby(['Operated closed'])['Operated closed'].count()
max_building_started = num_building_started.max()
max_num_closed = num_closed.max()
ymax = (max_building_started + 4) // 5 * 5
ymin = (max_num_closed + 4) // 5 * 5
combined = pd.concat([
num_building_started,
num_connected,
num_closed,
num_operated_closed,
], axis=1)
combined = combined.reindex(years, fill_value=0)
combined['Commercial operation tot'] = combined['Commercial operation'].fillna(0).cumsum()
combined['Operated closed tot'] = combined['Operated closed'].fillna(0).cumsum()
combined['In operation'] = combined['Commercial operation tot'] - combined['Operated closed tot']
fig, ax = plt.subplots(2, 1, figsize=(11, 10))
ax1, ax2 = ax
combined['Begin building'].plot.bar(ax=ax1, label='Construction started')
(-combined['Closed']).plot.bar(ax=ax1, color='red', label='Closed')
ax1.set_title('World')
ax1.set_xlabel(None)
format_years_ticks(ax1)
format_number_plants_ticks(ax1, ymin, ymax)
ax1.text(0, -ymin, copyright_text, fontsize=copyright_font_size, verticalalignment='bottom')
ax1.legend()
combined['In operation'].plot.bar(ax=ax2, label='Number in operation')
combined['Commercial operation'].plot.bar(ax=ax2, color='black', label='Operation started')
format_years_ticks(ax2)
#ax2.text(0, ax2.get_ylim()[1], copyright_text, fontsize=copyright_font_size, verticalalignment='top')
ax2.legend()
fig.tight_layout()
Region stats¶
In [7]:
total_constructed_by_region = input_df.groupby(['Region'])[['Begin building', 'Commercial operation', 'Closed']].count().reset_index().sort_values('Begin building', ascending=False).reset_index(drop=True)
total_constructed_by_region
Out[7]:
| Region | Begin building | Commercial operation | Closed | |
|---|---|---|---|---|
| 0 | Europe | 335 | 293 | 145 |
| 1 | Asia | 235 | 184 | 39 |
| 2 | Americas | 176 | 166 | 54 |
| 3 | Africa | 5 | 2 | 0 |
In [8]:
num_building_started = input_df.groupby(['Region', 'Begin building'])['Begin building'].count()
num_connected = input_df.groupby(['Region', 'Commercial operation'])['Commercial operation'].count()
num_closed = input_df.groupby(['Region', 'Closed'])['Closed'].count()
num_operated_closed = input_df.groupby(['Region', 'Operated closed'])['Operated closed'].count()
#top_regions = total_constructed_by_region[total_constructed_by_region['Begin building'] > 10]['Region'].to_list()
top_regions = total_constructed_by_region['Region'].to_list()
regions = sorted(c for c in input_df['Region'].unique() if c in num_building_started)
max_building_started = num_building_started.max()
max_num_closed = num_closed.max()
ymax = (max_building_started + 4) // 5 * 5
ymin = (max_num_closed + 4) // 5 * 5
tot_ymax = 250 # TODO: calculate from data
for c in top_regions:
combined = pd.concat([
num_building_started[c],
num_connected[c] if c in num_connected else pd.DataFrame(columns=['Commercial operation']),
num_closed[c] if c in num_closed else pd.DataFrame(columns=['Closed']),
num_operated_closed[c] if c in num_operated_closed else pd.DataFrame(columns=['Operated closed']),
], axis=1)
combined = combined.reindex(years, fill_value=0)
combined['Commercial operation tot'] = combined['Commercial operation'].fillna(0).cumsum()
combined['Operated closed tot'] = combined['Operated closed'].fillna(0).cumsum()
combined['In operation'] = combined['Commercial operation tot'] - combined['Operated closed tot']
fig, ax = plt.subplots(2, 1, figsize=(11, 10))
ax1, ax2 = ax
combined['Begin building'].plot.bar(ax=ax1, label='Construction started')
(-combined['Closed']).plot.bar(ax=ax1, color='red', label='Closed')
ax1.set_title(f'{c} (UN M49)')
ax1.set_xlabel(None)
format_years_ticks(ax1)
format_number_plants_ticks(ax1, ymin, ymax)
ax1.text(0, -ymin, copyright_text, fontsize=copyright_font_size, verticalalignment='bottom')
ax1.legend()
combined['In operation'].plot.bar(ax=ax2, label='Number in operation')
combined['Commercial operation'].plot.bar(ax=ax2, color='black', label='Operation started')
format_years_ticks(ax2)
ax2.set_ylim(0, tot_ymax)
#ax2.text(0, ax2.get_ylim()[1], copyright_text, fontsize=copyright_font_size, verticalalignment='top')
ax2.legend()
fig.tight_layout()
Subregion stats¶
In [9]:
total_constructed_by_subregion = input_df.groupby(['SubRegion'])[['Begin building', 'Commercial operation', 'Closed']].count().reset_index().sort_values('Begin building', ascending=False).reset_index(drop=True)
total_constructed_by_subregion
Out[9]:
| SubRegion | Begin building | Commercial operation | Closed | |
|---|---|---|---|---|
| 0 | Eastern Asia | 181 | 147 | 34 |
| 1 | Northern America | 165 | 159 | 52 |
| 2 | Western Europe | 125 | 120 | 58 |
| 3 | Eastern Europe | 122 | 94 | 29 |
| 4 | Northern Europe | 67 | 64 | 45 |
| 5 | Southern Asia | 42 | 31 | 2 |
| 6 | Southern Europe | 21 | 15 | 13 |
| 7 | Latin America and the Caribbean | 11 | 7 | 2 |
| 8 | Western Asia | 10 | 5 | 1 |
| 9 | Northern Africa | 3 | 0 | 0 |
| 10 | Sub-Saharan Africa | 2 | 2 | 0 |
| 11 | Central Asia | 1 | 1 | 1 |
| 12 | South-eastern Asia | 1 | 0 | 1 |
In [10]:
num_building_started = input_df.groupby(['SubRegion', 'Begin building'])['Begin building'].count()
num_connected = input_df.groupby(['SubRegion', 'Commercial operation'])['Commercial operation'].count()
num_closed = input_df.groupby(['SubRegion', 'Closed'])['Closed'].count()
num_operated_closed = input_df.groupby(['SubRegion', 'Operated closed'])['Operated closed'].count()
sub_regions = sorted(c for c in input_df['SubRegion'].unique() if c in num_building_started)
#top_regions = total_constructed_by_subregion[total_constructed_by_subregion['Begin building'] > 10]['SubRegion'].to_list()
top_regions = total_constructed_by_subregion['SubRegion'].to_list()
max_building_started = num_building_started.max()
max_num_closed = num_closed.max()
ymax = (max_building_started + 4) // 5 * 5
ymin = (max_num_closed + 4) // 5 * 5
tot_ymax = 130 # TODO: calculate from data
for c in top_regions:
combined = pd.concat([
num_building_started[c],
num_connected[c] if c in num_connected else pd.DataFrame(columns=['Commercial operation']),
num_closed[c] if c in num_closed else pd.DataFrame(columns=['Closed']),
num_operated_closed[c] if c in num_operated_closed else pd.DataFrame(columns=['Operated closed']),
], axis=1)
combined = combined.reindex(years, fill_value=0)
combined['Commercial operation tot'] = combined['Commercial operation'].fillna(0).cumsum()
combined['Operated closed tot'] = combined['Operated closed'].fillna(0).cumsum()
combined['In operation'] = combined['Commercial operation tot'] - combined['Operated closed tot']
fig, ax = plt.subplots(2, 1, figsize=(11, 10))
ax1, ax2 = ax
combined['Begin building'].plot.bar(ax=ax1, label='Construction started')
(-combined['Closed']).plot.bar(ax=ax1, color='red', label='Closed')
ax1.set_title(f'{c} (UN M49)')
ax1.set_xlabel(None)
format_years_ticks(ax1)
format_number_plants_ticks(ax1, ymin, ymax)
ax1.text(0, -ymin, copyright_text, fontsize=copyright_font_size, verticalalignment='bottom')
ax1.legend()
combined['In operation'].plot.bar(ax=ax2, label='Number in operation')
combined['Commercial operation'].plot.bar(ax=ax2, color='black', label='Operation started')
format_years_ticks(ax2)
ax2.set_ylim(0, tot_ymax)
#ax2.text(0, ax2.get_ylim()[1], copyright_text, fontsize=copyright_font_size, verticalalignment='top')
ax2.legend()
fig.tight_layout()
Country stats¶
In [11]:
total_constructed = input_df.groupby(['Country'])[['Begin building', 'Commercial operation', 'Closed']].count().reset_index().sort_values('Begin building', ascending=False).reset_index(drop=True)
total_constructed
Out[11]:
| Country | Begin building | Commercial operation | Closed | |
|---|---|---|---|---|
| 0 | United States | 140 | 134 | 46 |
| 1 | China | 81 | 55 | 0 |
| 2 | France | 69 | 68 | 12 |
| 3 | Japan | 62 | 59 | 26 |
| 4 | Russia | 62 | 47 | 10 |
| 5 | United Kingdom | 46 | 44 | 35 |
| 6 | Germany | 39 | 36 | 39 |
| 7 | India | 31 | 23 | 1 |
| 8 | South Korea | 30 | 27 | 2 |
| 9 | Canada | 25 | 25 | 6 |
| 10 | Ukraine | 25 | 19 | 8 |
| 11 | Sweden | 14 | 13 | 8 |
| 12 | Spain | 14 | 10 | 7 |
| 13 | Slovakia | 9 | 8 | 3 |
| 14 | Belgium | 8 | 8 | 3 |
| 15 | Bulgaria | 8 | 6 | 4 |
| 16 | Taiwan | 8 | 6 | 6 |
| 17 | Pakistan | 7 | 7 | 1 |
| 18 | Italy | 6 | 4 | 6 |
| 19 | Switzerland | 6 | 6 | 2 |
| 20 | Czech Republic | 6 | 6 | 0 |
| 21 | Finland | 5 | 5 | 0 |
| 22 | Turkey | 4 | 0 | 0 |
| 23 | United Arab Emirates | 4 | 3 | 0 |
| 24 | Argentina | 4 | 3 | 0 |
| 25 | Belarus | 4 | 2 | 2 |
| 26 | Hungary | 4 | 4 | 0 |
| 27 | Brazil | 3 | 2 | 0 |
| 28 | Egypt | 3 | 0 | 0 |
| 29 | Netherlands | 2 | 2 | 1 |
| 30 | Armenia | 2 | 2 | 1 |
| 31 | Bangladesh | 2 | 0 | 0 |
| 32 | Cuba | 2 | 0 | 2 |
| 33 | Iran | 2 | 1 | 0 |
| 34 | South Africa | 2 | 2 | 0 |
| 35 | Lithuania | 2 | 2 | 2 |
| 36 | Mexico | 2 | 2 | 0 |
| 37 | Romania | 2 | 2 | 0 |
| 38 | Poland | 2 | 0 | 2 |
| 39 | Philippines | 1 | 0 | 1 |
| 40 | Slovenia | 1 | 1 | 0 |
| 41 | Austria | 1 | 0 | 1 |
| 42 | Kazakhstan | 1 | 1 | 1 |
In [12]:
num_building_started = input_df.groupby(['Country', 'Begin building'])['Begin building'].count()
num_connected = input_df.groupby(['Country', 'Commercial operation'])['Commercial operation'].count()
num_closed = input_df.groupby(['Country', 'Closed'])['Closed'].count()
num_operated_closed = input_df.groupby(['Country', 'Operated closed'])['Operated closed'].count()
countries = sorted(c for c in input_df['Country'].unique() if c in num_building_started)
#top_countries = total_constructed[total_constructed['Begin building'] >= 10]['Country'].to_list()
top_countries = total_constructed['Country'].to_list()
max_building_started = num_building_started.max()
max_num_closed = num_closed.max()
ymax = (max_building_started + 4) // 5 * 5
ymin = (max_num_closed + 4) // 5 * 5
tot_ymax = 110 # TODO: calculate from data
for c in top_countries:
combined = pd.concat([
num_building_started[c],
num_connected[c] if c in num_connected else pd.DataFrame(columns=['Commercial operation']),
num_closed[c] if c in num_closed else pd.DataFrame(columns=['Closed']),
num_operated_closed[c] if c in num_operated_closed else pd.DataFrame(columns=['Operated closed']),
], axis=1)
combined = combined.reindex(years, fill_value=0)
combined['Commercial operation tot'] = combined['Commercial operation'].fillna(0).cumsum()
combined['Operated closed tot'] = combined['Operated closed'].fillna(0).cumsum()
combined['In operation'] = combined['Commercial operation tot'] - combined['Operated closed tot']
fig, ax = plt.subplots(2, 1, figsize=(11, 10))
ax1, ax2 = ax
combined['Begin building'].plot.bar(ax=ax1, label='Construction started')
(-combined['Closed']).plot.bar(ax=ax1, color='red', label='Closed')
ax1.set_title(c)
ax1.set_xlabel(None)
format_years_ticks(ax1)
format_number_plants_ticks(ax1, ymin, ymax)
ax1.text(0, -ymin, copyright_text, fontsize=copyright_font_size, verticalalignment='bottom')
ax1.legend()
combined['In operation'].plot.bar(ax=ax2, label='Number in operation')
combined['Commercial operation'].plot.bar(ax=ax2, color='black', label='Operation started')
format_years_ticks(ax2)
ax2.set_ylim(0, tot_ymax)
#ax2.text(0, ax2.get_ylim()[1], copyright_text, fontsize=copyright_font_size, verticalalignment='top')
ax2.legend()
fig.tight_layout()
/var/folders/2z/kr9wj6s90nn6nkdsddywzyfw0000gn/T/ipykernel_26758/1497431667.py:31: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. fig, ax = plt.subplots(2, 1, figsize=(11, 10))
Never opened reactors¶
In [13]:
# Never opened
never_opened_df = input_df[input_df['Commercial operation'].isna() & input_df['Begin building'].notna() & input_df['Closed'].notna()].sort_values('Closed')
num_building_started = never_opened_df.groupby(['Begin building'])['Begin building'].count()
num_closed = never_opened_df.groupby(['Closed'])['Closed'].count()
combined = pd.concat([
num_building_started,
num_closed,
], axis=1)
combined = combined.reindex(years, fill_value=0)
fig, ax = plt.subplots(figsize=(11, 5))
combined['Begin building'].plot.bar(ax=ax, label='Construction started')
(-combined['Closed']).plot.bar(ax=ax, color='red', label='Closed')
ax.set_title('Never operated reactors')
ax.set_xlabel(None)
format_years_ticks(ax)
ax.yaxis.set_major_formatter(lambda val, pos: int(val))
ax.text(0, ax.get_ylim()[0], copyright_text, fontsize=copyright_font_size, verticalalignment='bottom')
ax.legend()
fig.tight_layout()
In [14]:
never_opened_df.groupby(['Country', 'Closed'])['Closed'].count().sort_values(ascending=False)
Out[14]:
Country Closed
Spain 1984 4
United States 1984 3
Belarus 1987 2
Cuba 1992 2
Germany 1990 2
Italy 1988 2
Poland 1990 2
Taiwan 2014 2
Ukraine 1987 2
1990 2
United States 1983 2
Austria 1978 1
Germany 1985 1
Philippines 1986 1
Sweden 1970 1
Name: Closed, dtype: Int64
In [15]:
never_opened_df.groupby(['Country', 'Begin building'])['Begin building'].count().sort_values(ascending=False)
Out[15]:
Country Begin building
United States 1975 4
Poland 1982 2
Ukraine 1988 2
1984 2
Germany 1983 2
Italy 1982 2
Taiwan 1999 2
Belarus 1983 2
Spain 1972 2
1975 2
Sweden 1965 1
Austria 1972 1
Philippines 1976 1
Germany 1972 1
Cuba 1985 1
1983 1
United States 1977 1
Name: Begin building, dtype: Int64